# Time Series Analysis Laboratory with R # Prof. Lea Petrella # Faculty of Economics # Department of Methods and Models for Economics, Territory and Finance # Sapienza University of Rome # a.y. 2019-2020 ##################################### ##################################### #Lesson 1: Introduction to R (Part 2) ##################################### #Index: # #1)extraction and selection of elements #2)sorting in R #3) dataframe #4) data import #5) descriptive statistics #6) graphical representation #7) end of 2nd part ---> let's save everything! ######## #Recall the objects from Part 1 v1 = seq (1,10) v1 matrix1 = matrix (c (1:20), nrow = 5, ncol = 4) array1 # remember the paste () function colnames (matrix1) = paste ( "column", 1: 4, sep = "-") # same thing for the lines rownames (matrix1) = paste ( "row", 1: 5, sep = "-") array1 height = c (1.75, 1.80, 1.65, 1.90, 1.80, 1.71) weight = c (60, 72, 57, 90, 82, 72) bmi = weight / height ^ 2 # Body Mass Index ##################################### #1)extraction and selection of elements ##################################### # 1. based on location # Extraction from a vector v1 # position of the 4th element of v1 v1 [4] # extract the elements of position 1, 3, 6 v1 [c (1,3,6)] # extract elements from 1st to 3rd position v1 [c (1,2,3)] # or v1 [1: 3] # It is possibile to extract the same element several times v1 [c (rep (1.3), 3)] # I can also select it by negation v1 [-c (2,4)] # Extract from a matrix object array1 # extract the 2nd and 3rd element of matrix1 array1 [2,3] # extract 2nd row of matrix1 Line2 = array1 [2] line2 # extract columns without names names (line2) = NULL line2 # extract column 3 of matrix1 column3 = array1 [3] column3 names (column3) = NULL column3 # extract the elements of row 2 and 3 of column 4 of matrix1 array1 [c (2.3), 4] # extract the elements of row 2 and 3 of column 1 and 4 of matrix1 array1 [c (2,3), c (1.4)] # extract the elements located in (2,1) and (3,4) of matrix1 matrix1 [matrix (c (2,3,1,4), nrow = 2, ncol = 2)] # extract the "head" of a matrix head (matrix (1: 50, nrow = 10, ncol = 5)) # 2. by label # is it possibile to extract elements from a matrix by using columns and names # ... of course the matrix must have row and column names! matrix1 [ "2-row", "column-3"] array1 [ "Line-2"] array1 [, "column-3"] # 3. based on a "logical" condition height height> = 180 height [height> = 1.80] height [height> 1.75 & height <1.90] height [height> 1.75 & height <= 1.90] array1 array1> 8 major8 = matrix1 [matrix1> 8] # returns a vector! ##################################### #2)sorting in R ##################################### # sort () function returns the ordered vector sort (height) sort (height, decreasing = T) # function order () returns the original element position order (height) # another function that rearranges "reversing" the order of the elements rev (height) ##################################### #2)data.frame ##################################### # Data frames are the main objects in statistical analysis. Classic fromat: units(rows) x variables (columns) #How to create a data-frame df = data.frame (height, weight) #Each row of this object represents a statistical unit #How to create an identifier for each unit seq id = (1,6,1) df1 = data.frame (id, height, weight) #How do you access the variables of a data frame? # By location df [2] #Returns the weight variable df1 [1] #Returns the variable id #By label df $ weight #Returns the weight variable df1 $ id #Returns the variable id # Variable names in the dataframe names (df1) #check the names #Removing variable names names (df1) = NULL #how to change names of dataframe variables names.eng = c ( "identity", "Height", "Weight") names (df1) = names.eng #how to order a dataframe with respect to a variable df.ord = df1 [order (height),] #sorts units with respect to the variable height df.ord1 = df1 [order (weight),] #sorts units with respect to the weight variable #First descriptive statistics of a dataframe summary (df1 [2: 3]) # how to extract a subset of data using the subset function help (subset) df1.subset = subset (df1, height> = 1.80) df2.subset = subset (df1, height> = 1.80 & weight> 80) df3.subset = subset (df1, height> = 1.80 | weight> 80) df4.sub = subset (df1, drop (id> 3)) # in case of missing values df5.sub = subset (df1, is.na (weight) == FALSE) df6.sub = subset (df1, is.na (weight) == FALSE & is.na (height) == FALSE) #how to select the entire dataframe by omitting all missing values # we can use the complete.cases () function complete.cases (df1) df1 [complete.cases (df1)] ###################################### Save! ######################################